import lxml.etree as le
import urllib.request as ur


# # 请求url
# request=ur.Request('https://edu.csdn.net')
# # 读取页面
# response=ur.urlopen(request).read()
# # 保存页面
# with open('edu1.html','wb')as f:
#     #写入html文件字符串格式
#     html=f.write(response)



# 读取页面，进行解析
with open('edu1.html', 'r', encoding='utf-8')as f:
    # 读取html文件字符串格式
    html1 = f.read()

    # html文件转换成xpath格式，可以对这个对象进行索引
    html_x = le.HTML(html1)


    # 对一级标题进行索引
    div_x_s = html_x.xpath('//div[@class="classify_cList"]')
    data_s = []
    for div_x in div_x_s:
        # 一级标题
        category1 = div_x.xpath('./h3/a/text()')
        # 二级标题
        category2_s = div_x.xpath('./div/span/a/text()')
        # 将一级和二级标题加入空列表内
        data_s.append(
            {
                'category1': category1,
                'category2_s': category2_s
            }
        )
        # 通过遍历data将一级标题与二级标题输出
    # print(data_s)
    for data in data_s:
        # 输出一级标题
        print(data.get('category1')[0])
        for i in range(3):
            print('   ', data.get('category2_s')[i])

